- Notifications
You must be signed in to change notification settings - Fork 849
/
Copy pathOML4Py Data Cleaning Missing Data.dsnb
executable file
·1 lines (1 loc) · 7.92 KB
/
OML4Py Data Cleaning Missing Data.dsnb
1
[{"layout":null,"template":null,"templateConfig":null,"name":"OML4Py Data Cleaning Missing Data","description":null,"readOnly":false,"type":"low","paragraphs":[{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":null,"title":null,"message":["%python"," "],"enabled":true,"result":null,"sizeX":0,"hideCode":true,"width":12,"hideResult":true,"dynamicFormParams":null,"row":0,"hasTitle":false,"hideVizConfig":true,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":"html","title":null,"message":["%md","# OML4Py Data Cleaning: fill in missing data","In this notebook, we demonstrate how to fill in missing values using OML4Py.","","We use the customer insurance lifetime value data set which contains customer financial information, lifetime value, and whether or not the customer bought insurance.","","The dataset `CUSTOMER_INSURANCE_LTV_PY` is generated by the `\"OML Run-me-first\"` notebook, which `MUST` be run before this notebook.","","---","","###### IMPORTANT: The `\"OML Run-me-first\"` notebook is available under the menu `Templates -> Examples` and is a pre-requisite to the current notebook.","","---","","**Note**: when building or applying a model using in-database Oracle Machine Learning algorithms, this operation may *not* be needed separately if automatic data preparation is enabled. Automatic data preparation automatically replaces missing values of numerical attributes with the mean and missing values of categorical attributes with the mode. For example, see <a href=\"https://docs.oracle.com/en/database/oracle/machine-learning/oml4sql/23/dmcon/generalized-linear-model.html#GUID-19B8E133-0029-4892-88BB-3E1C9E83EB12\" onclick=\"return ! window.open('https://docs.oracle.com/en/database/oracle/machine-learning/oml4sql/23/dmcon/generalized-linear-model.html#GUID-19B8E133-0029-4892-88BB-3E1C9E83EB12');\"> Data Preparation for GLM <\/a>","","","Copyright (c) 2024 Oracle Corporation ","###### <a href=\"https://oss.oracle.com/licenses/upl/\" onclick=\"return ! window.open('https://oss.oracle.com/licenses/upl/');\">The Universal Permissive License (UPL), Version 1.0<\/a>","---"],"enabled":true,"result":null,"sizeX":0,"hideCode":true,"width":12,"hideResult":false,"dynamicFormParams":null,"row":0,"hasTitle":false,"hideVizConfig":true,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":"html","title":"For more information ...","message":["%md","","* <a href=\"https://docs.oracle.com/en/cloud/paas/autonomous-data-warehouse-cloud/index.html\" target=\"_blank\">Oracle ADW Documentation<\/a>","* <a href=\"https://github.com/oracle/oracle-db-examples/tree/master/machine-learning\" target=\"_blank\">OML folder on Oracle GitHub<\/a>","* <a href=\"https://www.oracle.com/machine-learning\" target=\"_blank\">OML Web Page<\/a>","* <a href=\"https://docs.oracle.com/en/database/oracle/machine-learning/oml4py/2/mlpug/prepare-and-explore-data1.html\" target=\"_blank\">OML4Py Data Preparation<\/a>"],"enabled":true,"result":null,"sizeX":0,"hideCode":true,"width":12,"hideResult":false,"dynamicFormParams":null,"row":0,"hasTitle":true,"hideVizConfig":false,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":"html","title":"Import libraries ","message":["%python","","import warnings","warnings.filterwarnings('ignore')","","import pandas as pd","import oml","from oml import automl"],"enabled":true,"result":null,"sizeX":0,"hideCode":false,"width":12,"hideResult":false,"dynamicFormParams":null,"row":0,"hasTitle":true,"hideVizConfig":false,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":"html","title":"Get proxy object for CUSTOMER_INSURANCE_LTV_PY table","message":["%python","","CUST_DF = oml.sync(table = 'CUSTOMER_INSURANCE_LTV_PY')"],"enabled":true,"result":null,"sizeX":0,"hideCode":false,"width":12,"hideResult":false,"dynamicFormParams":null,"row":0,"hasTitle":true,"hideVizConfig":false,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":"table","title":"Data Overview","message":["%python","","z.show(CUST_DF.head())"],"enabled":true,"result":null,"sizeX":0,"hideCode":false,"width":12,"hideResult":false,"dynamicFormParams":null,"row":0,"hasTitle":true,"hideVizConfig":false,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":"table","title":"Columns MARITAL_STATUS, CREDIT_BALANCE, BANK_FUNDS, INCOME have missing values","message":["%python","","z.show(CUST_DF[CUST_DF['CUSTOMER_ID'] < 'CU2'][['CUSTOMER_ID','MARITAL_STATUS', 'CREDIT_BALANCE', 'BANK_FUNDS', 'INCOME']].head())"],"enabled":true,"result":null,"sizeX":0,"hideCode":false,"width":12,"hideResult":false,"dynamicFormParams":null,"row":0,"hasTitle":true,"hideVizConfig":false,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":"raw","title":"Find mode for the categorical column MARITAL_STATUS","message":["%python","","def col_mode(DF, col):"," SUMMARY_DF = CUST_DF.dropna().crosstab([col]).sort_values('count').tail(1)"," return SUMMARY_DF.pull()[col].values[0]","","mode = col_mode(CUST_DF, 'MARITAL_STATUS')","","print(mode)"," "],"enabled":true,"result":null,"sizeX":0,"hideCode":false,"width":12,"hideResult":false,"dynamicFormParams":null,"row":0,"hasTitle":true,"hideVizConfig":false,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":"table","title":"Replace None with 'SINGLE' and then check the counts","message":["%python","","CUST_CAT_DF = CUST_DF.replace(old = [None], new = [mode], columns = ['MARITAL_STATUS']) ","","z.show(CUST_CAT_DF.crosstab('MARITAL_STATUS'))"],"enabled":true,"result":null,"sizeX":0,"hideCode":false,"width":12,"hideResult":false,"dynamicFormParams":null,"row":0,"hasTitle":true,"hideVizConfig":false,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":"raw","title":"Replace the missing numerical columns using the mean value","message":["%python","","num_cols = ['CREDIT_BALANCE', 'BANK_FUNDS', 'INCOME']","","mean_vals = [float(CUST_DF[col].mean()) for col in num_cols]","CUST_NUM_DF = CUST_DF","","print(mean_vals)","for i, col in enumerate(num_cols):"," CUST_NUM_DF = CUST_NUM_DF.replace(old = [None], new = [mean_vals[i]], columns = [col]) "],"enabled":true,"result":null,"sizeX":0,"hideCode":false,"width":12,"hideResult":false,"dynamicFormParams":null,"row":0,"hasTitle":true,"hideVizConfig":false,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":"table","title":"Show the OML proxy object after the replacement","message":["%python","","z.show(CUST_NUM_DF[CUST_NUM_DF['CUSTOMER_ID'] <'CU2'][ ['CUSTOMER_ID'] + num_cols].head())"],"enabled":true,"result":null,"sizeX":0,"hideCode":false,"width":12,"hideResult":false,"dynamicFormParams":null,"row":0,"hasTitle":true,"hideVizConfig":false,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":"html","title":null,"message":["%md","","## End of Script"],"enabled":true,"result":null,"sizeX":0,"hideCode":true,"width":12,"hideResult":false,"dynamicFormParams":null,"row":0,"hasTitle":false,"hideVizConfig":true,"hideGutter":true,"relations":[],"forms":"[]"},{"col":0,"visualizationConfig":null,"hideInIFrame":false,"selectedVisualization":"html","title":null,"message":["%md"],"enabled":true,"result":null,"sizeX":0,"hideCode":true,"width":12,"hideResult":true,"dynamicFormParams":null,"row":0,"hasTitle":false,"hideVizConfig":true,"hideGutter":true,"relations":[],"forms":"[]"}],"version":"6","snapshot":false,"tags":null}]